Skip to main content

Read Data

Common examples for reading geospatial data in Fused.

Python Packages

geopandas

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/subway_stations.geojson"):
import geopandas as gpd

return gpd.read_file(path)

shapely

@fused.udf
def udf():
import geopandas as gpd
from shapely.geometry import Point, Polygon

# Create geometries with shapely
points = [Point(-122.4, 37.8), Point(-122.3, 37.7)]
polygon = Polygon([(-122.5, 37.7), (-122.3, 37.7), (-122.3, 37.9), (-122.5, 37.9)])

gdf = gpd.GeoDataFrame(
{'type': ['point', 'point', 'polygon']},
geometry=points + [polygon],
crs=4326
)

return gdf

duckdb

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/housing_2024.parquet"):
import duckdb

conn = duckdb.connect()
result = conn.execute(f"""
SELECT *
FROM '{path}'
WHERE latitude IS NOT NULL
LIMIT 1000
""").df()

return result

rioxarray

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/elevation.tif"):
import rioxarray as rxr

# Read raster data with rioxarray
raster = rxr.open_rasterio(path)

# Convert to DataFrame for display
df = raster.to_dataframe().reset_index()

return df.head(1000)

xarray

@fused.udf
def udf():
import xarray as xr

# Download NetCDF data to mount disk for proper reading
path = fused.download('s3://fused-sample/demo_data/2025_01_01_ERA5_surface.nc','2025_01_01_ERA5_surface.nc')
ds = xr.open_dataset(path)

# Convert to DataFrame
df = ds.to_dataframe().reset_index()

return df.head(1000)

Table Formats (Vector)

GeoJSON (.geojson, .json)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states.geojson"):
import geopandas as gpd

return gpd.read_file(path)

Shapefile (.shp + .shx, .dbf, .prj)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states_shapefile.shp"):
import geopandas as gpd

return gpd.read_file(path)

GeoPackage (.gpkg)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states_geopackage.gpkg"):
import geopandas as gpd

return gpd.read_file(path)

KML/KMZ (.kml, .kmz)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states.kml"):
import geopandas as gpd

return gpd.read_file(path)

Parquet (.parquet)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/buildings.parquet"):
import geopandas as gpd

return gpd.read_parquet(path)

CSV with coordinates (.csv)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/subway_stations.csv"):
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Read CSV
df = pd.read_csv(path)

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df.longitude, df.latitude),
crs=4326
)

return gdf

Excel (.xlsx)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/subway_stations.xlsx"):
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Read Excel file
df = pd.read_excel(path)

# Convert to GeoDataFrame if coordinates exist
if 'longitude' in df.columns and 'latitude' in df.columns:
gdf = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df.longitude, df.latitude),
crs=4326
)
return gdf

return df

Array Formats (Raster)

GeoTIFF (.tif, .tiff)

@fused.udf
def udf(
path: str = 's3://fused-sample/demo_data/satellite_imagery/wildfires.tiff'
):
import rasterio

with rasterio.open(path) as src:
data = src.read()
bounds = src.bounds

return data, bounds

NetCDF (.nc)

@fused.udf
def udf():
import xarray as xr

# Download to mount disk for proper NetCDF reading
path = fused.download('s3://fused-sample/demo_data/climate_data.nc', 'climate_data.nc')

# Open NetCDF dataset
ds = xr.open_dataset(path)

return ds.to_dataframe().reset_index().head(1000)

STAC Catalog

Earth on AWS

@fused.udf
def udf(
bounds: fused.types.Bounds = [-77.083, 38.804, -76.969, 38.983],
):
import odc.stac
import pystac_client
import planetary_computer

odc.stac.configure_s3_access(aws_unsigned=True)
catalog = pystac_client.Client.open("https://earth-search.aws.element84.com/v1")

# Loading Elevation model
items = catalog.search(
collections=["cop-dem-glo-30"],
bbox=bounds
).item_collection()

xarray_dataset = odc.stac.load(
items,
crs="EPSG:3857",
bands=["data"],
resolution=150,
bbox=bounds,
).astype(int)

return xarray_dataset["data"], bounds

Microsoft Planetary Computer

@fused.udf
def udf(
bounds: fused.types.Bounds = [-122.463,37.755,-122.376,37.803],
):
import odc.stac
import planetary_computer
import pystac_client

catalog = pystac_client.Client.open(
"https://planetarycomputer.microsoft.com/api/stac/v1",
modifier=planetary_computer.sign_inplace,
)

# Loading Elevation model
items = catalog.search(collections=["cop-dem-glo-30"],bbox=bounds).item_collection()

xarray_dataset = odc.stac.load(
items,
crs="EPSG:3857",
bands=["data"],
resolution=150,
bbox=bounds,
).astype(int)

return xarray_dataset["data"], bounds